{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from deeptables.models import deeptable, deepnets\n",
    "from deeptables.datasets import dsutils\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = dsutils.load_bank()\n",
    "df.drop(['id'], axis=1, inplace=True)\n",
    "df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)\n",
    "y = df_train.pop('y')\n",
    "y_test = df_test.pop('y')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 class detected, {'yes', 'no'}, so inferred as a [binary classification] task\n",
      "Preparing features cost:0.04668307304382324\n",
      "Imputation cost:0.34171605110168457\n",
      "Categorical encoding cost:0.6677968502044678\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/jack/opt/anaconda3/envs/tf_2_0/lib/python3.7/site-packages/sklearn/preprocessing/_discretization.py:202: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
      "  'decreasing the number of bins.' % jj)\n",
      "/Users/jack/opt/anaconda3/envs/tf_2_0/lib/python3.7/site-packages/sklearn/preprocessing/_discretization.py:202: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
      "  'decreasing the number of bins.' % jj)\n",
      "/Users/jack/opt/anaconda3/envs/tf_2_0/lib/python3.7/site-packages/sklearn/preprocessing/_discretization.py:202: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
      "  'decreasing the number of bins.' % jj)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Discretization cost:0.23245501518249512\n",
      "fit_transform cost:1.6466460227966309\n",
      "Injected a callback [EarlyStopping]. monitor:val_AUC, patience:1, mode:max\n",
      ">>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< \n",
      "---------------------------------------------------------\n",
      "inputs:\n",
      "---------------------------------------------------------\n",
      "['all_categorical_vars: (16)', 'input_continuous_all: (7)']\n",
      "---------------------------------------------------------\n",
      "embeddings:\n",
      "---------------------------------------------------------\n",
      "input_dims: [14, 5, 6, 4, 4, 4, 5, 14, 6, 5, 9, 4, 7, 4, 6, 4]\n",
      "output_dims: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]\n",
      "dropout: 0.3\n",
      "---------------------------------------------------------\n",
      "dense: dropout: 0\n",
      "batch_normalization: False\n",
      "---------------------------------------------------------\n",
      "concat_embed_dense: shape: (None, 71)\n",
      "---------------------------------------------------------\n",
      "nets: ['dnn_nets', 'fm_nets', 'linear']\n",
      "---------------------------------------------------------\n",
      "dnn: input_shape (None, 71), output_shape (None, 64)\n",
      "fm: input_shape (None, 16, 4), output_shape (None, 1)\n",
      "linear: input_shape (None, 23), output_shape (None, 1)\n",
      "---------------------------------------------------------\n",
      "stacking_op: add\n",
      "---------------------------------------------------------\n",
      "output: activation: sigmoid, output_shape: (None, 1), use_bias: True\n",
      "loss: binary_crossentropy\n",
      "optimizer: Adam\n",
      "---------------------------------------------------------\n",
      "\n",
      "Train on 69442 samples, validate on 17361 samples\n",
      "69442/69442 [==============================] - 37s 540us/sample - loss: 15.0108 - AUC: 0.6081 - val_loss: 0.3557 - val_AUC: 0.8133\n",
      "Model has been saved to:dt_output/dt_20200929 102015_dnn_nets_fm_nets_linear/dnn_nets+fm_nets+linear.h5\n"
     ]
    }
   ],
   "source": [
    "config = deeptable.ModelConfig(nets=deepnets.DeepFM, auto_discrete=True, metrics=['AUC'])\n",
    "dt = deeptable.DeepTable(config=config)\n",
    "\n",
    "model, history = dt.fit(df_train, y, epochs=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 获取模型中的Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['input_categorical_vars_all',\n",
       " 'emb_categorical_vars_all',\n",
       " 'concat_embeddings_axis_0',\n",
       " 'flatten_embeddings',\n",
       " 'input_continuous_all',\n",
       " 'concat_embedding_dense',\n",
       " 'bn_concat_emb_dense',\n",
       " 'dnn_dense_1',\n",
       " 'dnn_activation_1',\n",
       " 'concat_linear_embedding',\n",
       " 'dnn_dense_2',\n",
       " 'tf_op_layer_linear_reduce_sum',\n",
       " 'dnn_activation_2',\n",
       " 'concat_fm_embedding',\n",
       " 'concat_linear_emb_dense',\n",
       " 'dense_logit_dnn_nets',\n",
       " 'fm_layer',\n",
       " 'linear_logit',\n",
       " 'add_logits',\n",
       " 'task_output']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[layer.name for layer in model.model.layers]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 获取某个Layer输出的特征向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "transform_X cost:2.4842710494995117\n",
      "apply cost:3.8825080394744873\n"
     ]
    }
   ],
   "source": [
    "features = dt.apply(df_test, output_layers=['dnn_dense_2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21701, 64)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
